This page describes plotting the STRUCTURE plots generated for Ceratopipra rubrocapilla. This dataset has 69 samples, and STRUCTURE was run on a dataset of 7865 SNPs.

First, I will load some functions to make it more compact to load a lot of data. These functions are custom for loading the q matrices output by STRUCTURE for my dataset: they load the data and then combine it with my sample metadata. I also have functions for reading the outfiles from clumpp, and a function for plotting the results.

load_Ceratopiprak2 <- function(species, Dataset, dataset=paste(Dataset, ".", sep=""), folder, subfolder="run_1"){
  CerySTRUCTURE_data_k2 <- read_delim(col_names=c("col1"), file=paste("../", species, "/", dataset, folder, "/", subfolder, "/", Dataset, ".res_q", sep=""), delim="\t", trim_ws=TRUE, show_col_types = FALSE) %>%
  separate(., col1, into = c("Code", "p1", "p2"), sep = " ")
  CerySTRUCTURE_data_k2 <- merge(CerySTRUCTURE_data_k2, rubrocapilla_metadata, by="Code")
  CerySTRUCTURE_data_k2_long <- CerySTRUCTURE_data_k2 %>%
  arrange(Longitude) %>%
  arrange(Population)%>%
   mutate(Code=factor(Code, levels=Code)) %>%
  gather(key=pop, value=assignment_p, p1:p2) %>%
    group_by(pop) %>%
    mutate(avg_assignment=mean(as.numeric(assignment_p))) %>%
    ungroup() %>%
    mutate(pop=fct_reorder(pop,avg_assignment))
  return(CerySTRUCTURE_data_k2_long)
}
load_Ceratopiprak3 <- function(species, Dataset, dataset=paste(Dataset, ".", sep=""), folder, subfolder="run_1"){
  CerySTRUCTURE_data_k3 <- read_delim(col_names=c("col1"), file=paste("../", species, "/", dataset, folder, "/", subfolder, "/", Dataset, ".res_q", sep=""), delim="\t", trim_ws=TRUE, show_col_types = FALSE) %>%
  separate(., col1, into = c("Code", "p1", "p2", "p3"), sep = " ")
  CerySTRUCTURE_data_k3 <- merge(CerySTRUCTURE_data_k3, rubrocapilla_metadata, by="Code")
  CerySTRUCTURE_data_k3_long <- CerySTRUCTURE_data_k3 %>%
  arrange(Longitude) %>%
   mutate(Code=factor(Code, levels=Code)) %>%
  gather(key=pop, value=assignment_p, p1:p3) %>%
    group_by(pop) %>%
    mutate(avg_assignment=mean(as.numeric(assignment_p))) %>%
    ungroup() %>%
    mutate(pop=fct_reorder(pop,avg_assignment))
  return(CerySTRUCTURE_data_k3_long)
}
load_Ceratopiprak4 <- function(species, Dataset, dataset=paste(Dataset, ".", sep=""), folder, subfolder="run_1"){
  CerySTRUCTURE_data_k4 <- read_delim(col_names=c("col1"), file=paste("../", species, "/", dataset, folder, "/", subfolder, "/", Dataset, ".res_q", sep=""), delim="\t", trim_ws=TRUE, show_col_types = FALSE) %>%
  separate(., col1, into = c("Code", "p1", "p2", "p3", "p4"), sep = " ")
  CerySTRUCTURE_data_k4 <- merge(CerySTRUCTURE_data_k4, rubrocapilla_metadata, by="Code")
  CerySTRUCTURE_data_k4_long <- CerySTRUCTURE_data_k4 %>%
  arrange(Longitude) %>%
   mutate(Code=factor(Code, levels=Code)) %>%
  gather(key=pop, value=assignment_p, p1:p4) %>%
    group_by(pop) %>%
    mutate(avg_assignment=mean(as.numeric(assignment_p))) %>%
    ungroup() %>%
    mutate(pop=fct_reorder(pop,avg_assignment))
  return(CerySTRUCTURE_data_k4_long)
}
load_Ceratopiprak5 <- function(species, Dataset, dataset=paste(Dataset, ".", sep=""), folder, subfolder="run_1"){
  CerySTRUCTURE_data_k5 <- read_delim(col_names=c("col1"), file=paste("../", species, "/", dataset, folder, "/", subfolder, "/", Dataset, ".res_q", sep=""), delim="\t", trim_ws=TRUE, show_col_types = FALSE) %>%
  separate(., col1, into = c("Code", "p1", "p2", "p3", "p4", "p5"), sep = " ")
  CerySTRUCTURE_data_k5 <- merge(CerySTRUCTURE_data_k5, rubrocapilla_metadata, by="Code")
  CerySTRUCTURE_data_k5_long <- CerySTRUCTURE_data_k5 %>%
  arrange(Longitude) %>%
   mutate(Code=factor(Code, levels=Code)) %>%
  gather(key=pop, value=assignment_p, p1:p5) %>%
    group_by(pop) %>%
    mutate(avg_assignment=mean(as.numeric(assignment_p))) %>%
    ungroup() %>%
    mutate(pop=fct_reorder(pop,avg_assignment))
  return(CerySTRUCTURE_data_k5_long)
}
load_Ceratopiprak6 <- function(species, Dataset, dataset=paste(Dataset, ".", sep=""), folder, subfolder="run_1"){
  CerySTRUCTURE_data_k6 <- read_delim(col_names=c("col1"), file=paste("../", species, "/", dataset, folder, "/", subfolder, "/", Dataset, ".res_q", sep=""), delim="\t", trim_ws=TRUE, show_col_types = FALSE) %>%
  separate(., col1, into = c("Code", "p1", "p2", "p3", "p4", "p5", "p6"), sep = " ")
  CerySTRUCTURE_data_k6 <- merge(CerySTRUCTURE_data_k6, rubrocapilla_metadata, by="Code")
  CerySTRUCTURE_data_k6_long <- CerySTRUCTURE_data_k6 %>%
  arrange(Longitude) %>%
   mutate(Code=factor(Code, levels=Code)) %>%
  gather(key=pop, value=assignment_p, p1:p6) %>%
    group_by(pop) %>%
    mutate(avg_assignment=mean(as.numeric(assignment_p))) %>%
    ungroup() %>%
    mutate(pop=fct_reorder(pop,avg_assignment))
  return(CerySTRUCTURE_data_k6_long)
}

load_Ceratopiprak7 <- function(species, Dataset, dataset=paste(Dataset, ".", sep=""), folder, subfolder="run_1"){
  CerySTRUCTURE_data_k7 <- read_delim(col_names=c("col1"), file=paste("../", species, "/", dataset, folder, "/", subfolder, "/", Dataset, ".res_q", sep=""), delim="\t", trim_ws=TRUE, show_col_types = FALSE) %>%
  separate(., col1, into = c("Code", "p1", "p2", "p3", "p4", "p5", "p6", "p7"), sep = " ")
  CerySTRUCTURE_data_k7 <- merge(CerySTRUCTURE_data_k7, rubrocapilla_metadata, by="Code")
  CerySTRUCTURE_data_k7_long <- CerySTRUCTURE_data_k7 %>%
  arrange(Longitude) %>%
   mutate(Code=factor(Code, levels=Code)) %>%
  gather(key=pop, value=assignment_p, p1:p7) %>%
    group_by(pop) %>%
    mutate(avg_assignment=mean(as.numeric(assignment_p))) %>%
    ungroup() %>%
    mutate(pop=fct_reorder(pop,avg_assignment))
  return(CerySTRUCTURE_data_k7_long)
}

load_Ceratopiprak8 <- function(species, Dataset, dataset=paste(Dataset, ".", sep=""), folder, subfolder="run_1"){
  CerySTRUCTURE_data_k8 <- read_delim(col_names=c("col1"), file=paste("../", species, "/", dataset, folder, "/", subfolder, "/", Dataset, ".res_q", sep=""), delim="\t", trim_ws=TRUE, show_col_types = FALSE) %>%
  separate(., col1, into = c("Code", "p1", "p2", "p3", "p4", "p5", "p6", "p7", "p8"), sep = " ")
  CerySTRUCTURE_data_k8 <- merge(CerySTRUCTURE_data_k8, rubrocapilla_metadata, by="Code")
  CerySTRUCTURE_data_k8_long <- CerySTRUCTURE_data_k8 %>%
  arrange(Longitude) %>%
   mutate(Code=factor(Code, levels=Code)) %>%
  gather(key=pop, value=assignment_p, p1:p8) %>%
    group_by(pop) %>%
    mutate(avg_assignment=mean(as.numeric(assignment_p))) %>%
    ungroup() %>%
    mutate(pop=fct_reorder(pop,avg_assignment))
  return(CerySTRUCTURE_data_k8_long)
}

plot_Ceratopipra <- function(CerySTRUCTURE_data, title="STRUCTURE plot"){
  ggplot(CerySTRUCTURE_data, aes(factor(Code), as.numeric(assignment_p), fill = factor(pop))) +
  geom_col(color = "gray", size = 0.1) +
  theme_minimal() + labs(x = "Individuals", title = title, y = "Ancestry") +
  scale_y_continuous(expand = c(0, 0)) +
  scale_x_discrete(expand = expand_scale(add = 1)) +
  theme(panel.spacing.x = unit(0.1, "lines"), panel.grid = element_blank()) +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+
  #theme(axis.text.x = element_blank())+
  scale_fill_gdocs(guide = FALSE)+
  facet_grid(~factor(Population, levels=c("cornuta","mentalis","Trinidad","Guiana","Jaú","Panama","Imeri","Napo","chloromeros","Inambari","Rondonia","Tapajos","Xingu/Belém","AtlanticForest")), scales = "free")
}

plot_Ceratopipra2 <- function(CerySTRUCTURE_data, title="STRUCTURE plot"){
  ggplot(CerySTRUCTURE_data, aes(factor(Code), as.numeric(assignment_p), fill = factor(pop))) +
  geom_col(color = "gray", size = 0.1) +
  theme_minimal() + labs(x = "Individuals", title = title, y = "Ancestry") +
  scale_y_continuous(expand = c(0, 0)) +
  scale_x_discrete(expand = expand_scale(add = 1)) +
  theme(panel.spacing.x = unit(0.1, "lines"), panel.grid = element_blank()) +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+
  #theme(axis.text.x = element_blank())+
  scale_fill_gdocs(guide = FALSE)+
  facet_grid(. ~factor(Population, levels=c("cornuta","mentalis","Trinidad","Guiana","Jaú","Panama","Imeri","Napo","chloromeros","Inambari","Rondonia","Tapajos","Xingu/Belém","AtlanticForest")), scales = "free", space = "free")
}

load_clumppk2 <- function(species, Dataset, folder="100k100k_POPALPHAS", greedy, latlon="latitude"){
  if (greedy==FALSE) {
      CerySTRUCTURE_data_k2 <- read.table(file=paste("../", species, "/", Dataset, "_", folder, "/clumpp_out/k2.outfile", sep=""), col.names=c("c1", "sampleID", "missingness", "c4", "c5", "p1", "p2"))
  clumppnames <- read.table(file=paste("../", species, "/", Dataset, "_", folder, "/clumpp_out/sample_names", sep=""))
  } else {
      CerySTRUCTURE_data_k2 <- read.table(file=paste("../", species, "/", Dataset, "_", folder, "/clumpp_out_greedy/k2.outfile", sep=""), col.names=c("c1", "sampleID", "missingness", "c4", "c5", "p1", "p2"))
  clumppnames <- read.table(file=paste("../", species, "/", Dataset, "_", folder, "/clumpp_out_greedy/sample_names", sep=""))
  }
  CerySTRUCTURE_data_k2$Code <- clumppnames[,1]
  CerySTRUCTURE_data_k2 <- merge(CerySTRUCTURE_data_k2, rubrocapilla_metadata, by="Code")
  if (latlon=="latitude") {
    CerySTRUCTURE_data_k2_long <- CerySTRUCTURE_data_k2 %>%
      arrange(Latitude) %>%
      arrange(Population) %>%
      mutate(Code=factor(Code, levels=Code)) %>%
      gather(key=pop, value=assignment_p, p1:p2) %>%
      group_by(pop) %>%
      mutate(avg_assignment=mean(as.numeric(assignment_p))) %>%
      ungroup() %>%
      mutate(pop=fct_reorder(pop,avg_assignment))
  } else {
    CerySTRUCTURE_data_k2_long <- CerySTRUCTURE_data_k2 %>%
      arrange(Longitude) %>%
      arrange(Population) %>%
      mutate(Code=factor(Code, levels=Code)) %>%
      gather(key=pop, value=assignment_p, p1:p2) %>%
      group_by(pop) %>%
      mutate(avg_assignment=mean(as.numeric(assignment_p))) %>%
      ungroup() %>%
      mutate(pop=fct_reorder(pop,avg_assignment))
  }
  return(CerySTRUCTURE_data_k2_long)
}

load_clumppk3 <- function(species, Dataset, folder="100k100k_POPALPHAS", greedy, latlon="latitude"){
  if (greedy==FALSE) {
  CerySTRUCTURE_data_k3 <- read.table(file=paste("../", species, "/", Dataset, "_", folder, "/clumpp_out/k3.outfile", sep=""), col.names=c("c1", "sampleID", "missingness", "c4", "c5", "p1", "p2", "p3"))
  clumppnames <- read.table(file=paste("../", species, "/", Dataset, "_", folder, "/clumpp_out/sample_names", sep=""))
  } else {
      CerySTRUCTURE_data_k3 <- read.table(file=paste("../", species, "/", Dataset, "_", folder, "/clumpp_out_greedy/k3.outfile", sep=""), col.names=c("c1", "sampleID", "missingness", "c4", "c5", "p1", "p2", "p3"))
  clumppnames <- read.table(file=paste("../", species, "/", Dataset, "_", folder, "/clumpp_out_greedy/sample_names", sep=""))
  }
  CerySTRUCTURE_data_k3$Code <- clumppnames[,1]
  CerySTRUCTURE_data_k3 <- merge(CerySTRUCTURE_data_k3, rubrocapilla_metadata, by="Code")
  if (latlon=="latitude") {
    CerySTRUCTURE_data_k3_long <- CerySTRUCTURE_data_k3 %>%
      arrange(Latitude) %>%
      arrange(Population) %>%
      mutate(Code=factor(Code, levels=Code)) %>%
      gather(key=pop, value=assignment_p, p1:p3) %>%
      group_by(pop) %>%
      mutate(avg_assignment=mean(as.numeric(assignment_p))) %>%
      ungroup() %>%
      mutate(pop=fct_reorder(pop,avg_assignment))
  } else {
    CerySTRUCTURE_data_k3_long <- CerySTRUCTURE_data_k3 %>%
      arrange(Longitude) %>%
      arrange(Population) %>%
      mutate(Code=factor(Code, levels=Code)) %>%
      gather(key=pop, value=assignment_p, p1:p3) %>%
      group_by(pop) %>%
      mutate(avg_assignment=mean(as.numeric(assignment_p))) %>%
      ungroup() %>%
      mutate(pop=fct_reorder(pop,avg_assignment))
  }
  return(CerySTRUCTURE_data_k3_long)
}

load_clumppk4 <- function(species, Dataset, folder="100k100k_POPALPHAS", greedy, latlon="latitude"){
  if (greedy==FALSE) {
  CerySTRUCTURE_data_k4 <- read.table(file=paste("../", species, "/", Dataset, "_", folder, "/clumpp_out/k4.outfile", sep=""), col.names=c("c1", "sampleID", "missingness", "c4", "c5", "p1", "p2", "p3", "p4"))
  clumppnames <- read.table(file=paste("../", species, "/", Dataset, "_", folder, "/clumpp_out/sample_names", sep=""))
  } else {
      CerySTRUCTURE_data_k4 <- read.table(file=paste("../", species, "/", Dataset, "_", folder, "/clumpp_out_greedy/k4.outfile", sep=""), col.names=c("c1", "sampleID", "missingness", "c4", "c5", "p1", "p2", "p3", "p4"))
  clumppnames <- read.table(file=paste("../", species, "/", Dataset, "_", folder, "/clumpp_out_greedy/sample_names", sep=""))
  }
  CerySTRUCTURE_data_k4$Code <- clumppnames[,1]
  CerySTRUCTURE_data_k4 <- merge(CerySTRUCTURE_data_k4, rubrocapilla_metadata, by="Code")
  if (latlon=="latitude") {
    CerySTRUCTURE_data_k4_long <- CerySTRUCTURE_data_k4 %>%
      arrange(Latitude) %>%
      arrange(Population) %>%
      mutate(Code=factor(Code, levels=Code)) %>%
      gather(key=pop, value=assignment_p, p1:p4) %>%
      group_by(pop) %>%
      mutate(avg_assignment=mean(as.numeric(assignment_p))) %>%
      ungroup() %>%
      mutate(pop=fct_reorder(pop,avg_assignment))
  } else {
    CerySTRUCTURE_data_k4_long <- CerySTRUCTURE_data_k4 %>%
      arrange(Longitude) %>%
      arrange(Population) %>%
      mutate(Code=factor(Code, levels=Code)) %>%
      gather(key=pop, value=assignment_p, p1:p4) %>%
      group_by(pop) %>%
      mutate(avg_assignment=mean(as.numeric(assignment_p))) %>%
      ungroup() %>%
      mutate(pop=fct_reorder(pop,avg_assignment))
  }
}

load_clumppk5 <- function(species, Dataset, folder="100k100k_POPALPHAS", greedy, latlon="latitude"){
  if (greedy==FALSE) {
  CerySTRUCTURE_data_k5 <- read.table(file=paste("../", species, "/", Dataset, "_", folder, "/clumpp_out/k5.outfile", sep=""), col.names=c("c1", "sampleID", "missingness", "c4", "c5", "p1", "p2", "p3", "p4", "p5"))
  clumppnames <- read.table(file=paste("../", species, "/", Dataset, "_", folder, "/clumpp_out/sample_names", sep=""))
  } else {
      CerySTRUCTURE_data_k5 <- read.table(file=paste("../", species, "/", Dataset, "_", folder, "/clumpp_out_greedy/k5.outfile", sep=""), col.names=c("c1", "sampleID", "missingness", "c4", "c5", "p1", "p2", "p3", "p4", "p5"))
  clumppnames <- read.table(file=paste("../", species, "/", Dataset, "_", folder, "/clumpp_out_greedy/sample_names", sep=""))
  }
  CerySTRUCTURE_data_k5$Code <- clumppnames[,1]
  CerySTRUCTURE_data_k5 <- merge(CerySTRUCTURE_data_k5, rubrocapilla_metadata, by="Code")
  if (latlon=="latitude") {
    CerySTRUCTURE_data_k5_long <- CerySTRUCTURE_data_k5 %>%
      arrange(Latitude) %>%
      arrange(Population) %>%
      mutate(Code=factor(Code, levels=Code)) %>%
      gather(key=pop, value=assignment_p, p1:p5) %>%
      group_by(pop) %>%
      mutate(avg_assignment=mean(as.numeric(assignment_p))) %>%
      ungroup() %>%
      mutate(pop=fct_reorder(pop,avg_assignment))
  } else {
    CerySTRUCTURE_data_k5_long <- CerySTRUCTURE_data_k5 %>%
      arrange(Longitude) %>%
      arrange(Population) %>%
      mutate(Code=factor(Code, levels=Code)) %>%
      gather(key=pop, value=assignment_p, p1:p5) %>%
      group_by(pop) %>%
      mutate(avg_assignment=mean(as.numeric(assignment_p))) %>%
      ungroup() %>%
      mutate(pop=fct_reorder(pop,avg_assignment))
  }
  return(CerySTRUCTURE_data_k5_long)
}

load_clumppk6 <- function(species, Dataset, folder, greedy, latlon="latitude"){
  if (greedy==FALSE) {
  CerySTRUCTURE_data_k6 <- read.table(file=paste("../", species, "/", Dataset, "_", folder, "/clumpp_out/k6.outfile", sep=""), col.names=c("c1", "sampleID", "missingness", "c4", "c5", "p1", "p2", "p3", "p4", "p5", "p6"))
  clumppnames <- read.table(file=paste("../", species, "/", Dataset, "_", folder, "/clumpp_out/sample_names", sep=""))
  } else {
      CerySTRUCTURE_data_k6 <- read.table(file=paste("../", species, "/", Dataset, "_", folder, "/clumpp_out_greedy/k6.outfile", sep=""), col.names=c("c1", "sampleID", "missingness", "c4", "c5", "p1", "p2", "p3", "p4", "p5", "p6"))
  clumppnames <- read.table(file=paste("../", species, "/", Dataset, "_", folder, "/clumpp_out_greedy/sample_names", sep=""))
  }
  CerySTRUCTURE_data_k6$Code <- clumppnames[,1]
  CerySTRUCTURE_data_k6 <- merge(CerySTRUCTURE_data_k6, rubrocapilla_metadata, by="Code")
  if (latlon=="latitude") {
    CerySTRUCTURE_data_k6_long <- CerySTRUCTURE_data_k6 %>%
      arrange(Latitude) %>%
      arrange(Population) %>%
      mutate(Code=factor(Code, levels=Code)) %>%
      gather(key=pop, value=assignment_p, p1:p6) %>%
      group_by(pop) %>%
      mutate(avg_assignment=mean(as.numeric(assignment_p))) %>%
      ungroup() %>%
      mutate(pop=fct_reorder(pop,avg_assignment))
  } else {
    CerySTRUCTURE_data_k6_long <- CerySTRUCTURE_data_k6 %>%
      arrange(Longitude) %>%
      arrange(Population) %>%
      mutate(Code=factor(Code, levels=Code)) %>%
      gather(key=pop, value=assignment_p, p1:p6) %>%
      group_by(pop) %>%
      mutate(avg_assignment=mean(as.numeric(assignment_p))) %>%
      ungroup() %>%
      mutate(pop=fct_reorder(pop,avg_assignment))
  }

  return(CerySTRUCTURE_data_k6_long)
}

load_clumppk7 <- function(species, Dataset, folder, greedy, latlon="latitude"){
  if (greedy==FALSE) {
  CerySTRUCTURE_data_k7 <- read.table(file=paste("../", species, "/", Dataset, "_", folder, "/clumpp_out/k7.outfile", sep=""), col.names=c("c1", "sampleID", "missingness", "c4", "c5", "p1", "p2", "p3", "p4", "p5", "p6", "p7"))
  clumppnames <- read.table(file=paste("../", species, "/", Dataset, "_", folder, "/clumpp_out/sample_names", sep=""))
  } else {
      CerySTRUCTURE_data_k7 <- read.table(file=paste("../", species, "/", Dataset, "_", folder, "/clumpp_out_greedy/k7.outfile", sep=""), col.names=c("c1", "sampleID", "missingness", "c4", "c5", "p1", "p2", "p3", "p4", "p5", "p6", "p7"))
  clumppnames <- read.table(file=paste("../", species, "/", Dataset, "_", folder, "/clumpp_out_greedy/sample_names", sep=""))
  }
  CerySTRUCTURE_data_k7$Code <- clumppnames[,1]
  CerySTRUCTURE_data_k7 <- merge(CerySTRUCTURE_data_k7, rubrocapilla_metadata, by="Code")
  if (latlon=="latitude") {
    CerySTRUCTURE_data_k7_long <- CerySTRUCTURE_data_k7 %>%
      arrange(Latitude) %>%
      arrange(Population) %>%
      mutate(Code=factor(Code, levels=Code)) %>%
      gather(key=pop, value=assignment_p, p1:p7) %>%
      group_by(pop) %>%
      mutate(avg_assignment=mean(as.numeric(assignment_p))) %>%
      ungroup() %>%
      mutate(pop=fct_reorder(pop,avg_assignment))
  } else {
    CerySTRUCTURE_data_k7_long <- CerySTRUCTURE_data_k7 %>%
      arrange(Longitude) %>%
      arrange(Population) %>%
      mutate(Code=factor(Code, levels=Code)) %>%
      gather(key=pop, value=assignment_p, p1:p7) %>%
      group_by(pop) %>%
      mutate(avg_assignment=mean(as.numeric(assignment_p))) %>%
      ungroup() %>%
      mutate(pop=fct_reorder(pop,avg_assignment))
  }
  return(CerySTRUCTURE_data_k7_long)
}

load_clumppk8 <- function(species, Dataset, folder="100k100k_POPALPHAS", greedy, latlon="latitude"){
  if (greedy==FALSE) {
  CerySTRUCTURE_data_k8 <- read.table(file=paste("../", species, "/", Dataset, "_", folder, "/clumpp_out/k8.outfile", sep=""), col.names=c("c1", "sampleID", "missingness", "c4", "c5", "p1", "p2", "p3", "p4", "p5", "p6", "p7", "p8"))
  clumppnames <- read.table(file=paste("../", species, "/", Dataset, "_", folder, "/clumpp_out/sample_names", sep=""))
  } else {
      CerySTRUCTURE_data_k8 <- read.table(file=paste("../", species, "/", Dataset, "_", folder, "/clumpp_out_greedy/k8.outfile", sep=""), col.names=c("c1", "sampleID", "missingness", "c4", "c5", "p1", "p2", "p3", "p4", "p5", "p6", "p7", "p8"))
  clumppnames <- read.table(file=paste("../", species, "/", Dataset, "_", folder, "/clumpp_out_greedy/sample_names", sep=""))
  }
  CerySTRUCTURE_data_k8$Code <- clumppnames[,1]
  CerySTRUCTURE_data_k8 <- merge(CerySTRUCTURE_data_k8, rubrocapilla_metadata, by="Code")
  if (latlon=="latitude") {
    CerySTRUCTURE_data_k8_long <- CerySTRUCTURE_data_k8 %>%
      arrange(Latitude) %>%
      arrange(Population) %>%
      mutate(Code=factor(Code, levels=Code)) %>%
      gather(key=pop, value=assignment_p, p1:p8) %>%
      group_by(pop) %>%
      mutate(avg_assignment=mean(as.numeric(assignment_p))) %>%
      ungroup() %>%
      mutate(pop=fct_reorder(pop,avg_assignment))
  } else {
    CerySTRUCTURE_data_k8_long <- CerySTRUCTURE_data_k8 %>%
      arrange(Longitude) %>%
      arrange(Population) %>%
      mutate(Code=factor(Code, levels=Code)) %>%
      gather(key=pop, value=assignment_p, p1:p8) %>%
      group_by(pop) %>%
      mutate(avg_assignment=mean(as.numeric(assignment_p))) %>%
      ungroup() %>%
      mutate(pop=fct_reorder(pop,avg_assignment))
  }
  return(CerySTRUCTURE_data_k8_long)
}

Next I need to load the metadata for my samples. This is stored in a text file.

#read in the metadata
rubrocapilla_metadata <- read.delim(file="~/Documents/GitHub/Ceratopipra_rubrocapilla_Phylogeography/3_Structure/3.1aa_PCA/Ceratopipra_rubrocapilla_extendedmetadata.txt", sep="\t", header=TRUE)

With these functions, load the data and generate plots.

rk2.1<-plot_Ceratopipra(load_Ceratopiprak2("Ceratopipra_rubrocapilla", "Cerrub.AC4.37_0.2_5_20.0", folder="k2_200k1000k_POPALPHAS", subfolder="run_1"), title="K=2: AC4.37_0.2_5_20.0 #1")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: `expand_scale()` was deprecated in ggplot2 3.3.0.
## ℹ Please use `expansion()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
rk2.2<-plot_Ceratopipra(load_Ceratopiprak2("Ceratopipra_rubrocapilla", "Cerrub.AC4.37_0.2_5_20.0", folder="k2_200k1000k_POPALPHAS", subfolder="run_2"), title="K=2: AC4.37_0.2_5_20.0 #2")
rk2.3<-plot_Ceratopipra(load_Ceratopiprak2("Ceratopipra_rubrocapilla", "Cerrub.AC4.37_0.2_5_20.0", folder="k2_200k1000k_POPALPHAS", subfolder="run_3"), title="K=2: AC4.37_0.2_5_20.0 #3")
rk2.4<-plot_Ceratopipra(load_Ceratopiprak2("Ceratopipra_rubrocapilla", "Cerrub.AC4.37_0.2_5_20.0", folder="k2_200k1000k_POPALPHAS", subfolder="run_4"), title="K=2: AC4.37_0.2_5_20.0 #4")
rk2.5<-plot_Ceratopipra(load_Ceratopiprak2("Ceratopipra_rubrocapilla", "Cerrub.AC4.37_0.2_5_20.0", folder="k2_200k1000k_POPALPHAS", subfolder="run_5"), title="K=2: AC4.37_0.2_5_20.0 #5")
rk2.6<-plot_Ceratopipra(load_Ceratopiprak2("Ceratopipra_rubrocapilla", "Cerrub.AC4.37_0.2_5_20.0", folder="k2_200k1000k_POPALPHAS", subfolder="run_6"), title="K=2: AC4.37_0.2_5_20.0 #6")
rk2.7<-plot_Ceratopipra(load_Ceratopiprak2("Ceratopipra_rubrocapilla", "Cerrub.AC4.37_0.2_5_20.0", folder="k2_200k1000k_POPALPHAS", subfolder="run_7"), title="K=2: AC4.37_0.2_5_20.0 #7")
rk2.8<-plot_Ceratopipra(load_Ceratopiprak2("Ceratopipra_rubrocapilla", "Cerrub.AC4.37_0.2_5_20.0", folder="k2_200k1000k_POPALPHAS", subfolder="run_8"), title="K=2: AC4.37_0.2_5_20.0 #8")
rk2.9<-plot_Ceratopipra(load_Ceratopiprak2("Ceratopipra_rubrocapilla", "Cerrub.AC4.37_0.2_5_20.0", folder="k2_200k1000k_POPALPHAS", subfolder="run_9"), title="K=2: AC4.37_0.2_5_20.0 #9")
rk2.10<-plot_Ceratopipra(load_Ceratopiprak2("Ceratopipra_rubrocapilla", "Cerrub.AC4.37_0.2_5_20.0", folder="k2_200k1000k_POPALPHAS", subfolder="run_10"), title="K=2: AC4.37_0.2_5_20.0 #10")

rk3.1<-plot_Ceratopipra(load_Ceratopiprak3("Ceratopipra_rubrocapilla", "Cerrub.AC4.37_0.2_5_20.0", folder="k3_200k1000k_POPALPHAS", subfolder="run_1"), title="K=3: AC4.37_0.2_5_20.0 #1")
rk3.2<-plot_Ceratopipra(load_Ceratopiprak3("Ceratopipra_rubrocapilla", "Cerrub.AC4.37_0.2_5_20.0", folder="k3_200k1000k_POPALPHAS", subfolder="run_2"), title="K=3: AC4.37_0.2_5_20.0 #2")
rk3.3<-plot_Ceratopipra(load_Ceratopiprak3("Ceratopipra_rubrocapilla", "Cerrub.AC4.37_0.2_5_20.0", folder="k3_200k1000k_POPALPHAS", subfolder="run_3"), title="K=3: AC4.37_0.2_5_20.0 #3")
rk3.4<-plot_Ceratopipra(load_Ceratopiprak3("Ceratopipra_rubrocapilla", "Cerrub.AC4.37_0.2_5_20.0", folder="k3_200k1000k_POPALPHAS", subfolder="run_4"), title="K=3: AC4.37_0.2_5_20.0 #4")
rk3.5<-plot_Ceratopipra(load_Ceratopiprak3("Ceratopipra_rubrocapilla", "Cerrub.AC4.37_0.2_5_20.0", folder="k3_200k1000k_POPALPHAS", subfolder="run_5"), title="K=3: AC4.37_0.2_5_20.0 #5")
rk3.6<-plot_Ceratopipra(load_Ceratopiprak3("Ceratopipra_rubrocapilla", "Cerrub.AC4.37_0.2_5_20.0", folder="k3_200k1000k_POPALPHAS", subfolder="run_6"), title="K=3: AC4.37_0.2_5_20.0 #6")
rk3.7<-plot_Ceratopipra(load_Ceratopiprak3("Ceratopipra_rubrocapilla", "Cerrub.AC4.37_0.2_5_20.0", folder="k3_200k1000k_POPALPHAS", subfolder="run_7"), title="K=3: AC4.37_0.2_5_20.0 #7")
rk3.8<-plot_Ceratopipra(load_Ceratopiprak3("Ceratopipra_rubrocapilla", "Cerrub.AC4.37_0.2_5_20.0", folder="k3_200k1000k_POPALPHAS", subfolder="run_8"), title="K=3: AC4.37_0.2_5_20.0 #8")
rk3.9<-plot_Ceratopipra(load_Ceratopiprak3("Ceratopipra_rubrocapilla", "Cerrub.AC4.37_0.2_5_20.0", folder="k3_200k1000k_POPALPHAS", subfolder="run_9"), title="K=3: AC4.37_0.2_5_20.0 #9")
rk3.10<-plot_Ceratopipra(load_Ceratopiprak3("Ceratopipra_rubrocapilla", "Cerrub.AC4.37_0.2_5_20.0", folder="k3_200k1000k_POPALPHAS", subfolder="run_10"), title="K=3: AC4.37_0.2_5_20.0 #10")

rk4.1<-plot_Ceratopipra(load_Ceratopiprak4("Ceratopipra_rubrocapilla", "Cerrub.AC4.37_0.2_5_20.0", folder="k4_200k1000k_POPALPHAS", subfolder="run_1"), title="K=4: AC4.37_0.2_5_20.0 #1")
rk4.2<-plot_Ceratopipra(load_Ceratopiprak4("Ceratopipra_rubrocapilla", "Cerrub.AC4.37_0.2_5_20.0", folder="k4_200k1000k_POPALPHAS", subfolder="run_2"), title="K=4: AC4.37_0.2_5_20.0 #2")
rk4.3<-plot_Ceratopipra(load_Ceratopiprak4("Ceratopipra_rubrocapilla", "Cerrub.AC4.37_0.2_5_20.0", folder="k4_200k1000k_POPALPHAS", subfolder="run_3"), title="K=4: AC4.37_0.2_5_20.0 #3")
rk4.4<-plot_Ceratopipra(load_Ceratopiprak4("Ceratopipra_rubrocapilla", "Cerrub.AC4.37_0.2_5_20.0", folder="k4_200k1000k_POPALPHAS", subfolder="run_4"), title="K=4: AC4.37_0.2_5_20.0 #4")
rk4.5<-plot_Ceratopipra(load_Ceratopiprak4("Ceratopipra_rubrocapilla", "Cerrub.AC4.37_0.2_5_20.0", folder="k4_200k1000k_POPALPHAS", subfolder="run_5"), title="K=4: AC4.37_0.2_5_20.0 #5")
rk4.6<-plot_Ceratopipra(load_Ceratopiprak4("Ceratopipra_rubrocapilla", "Cerrub.AC4.37_0.2_5_20.0", folder="k4_200k1000k_POPALPHAS", subfolder="run_6"), title="K=4: AC4.37_0.2_5_20.0 #6")
rk4.7<-plot_Ceratopipra(load_Ceratopiprak4("Ceratopipra_rubrocapilla", "Cerrub.AC4.37_0.2_5_20.0", folder="k4_200k1000k_POPALPHAS", subfolder="run_7"), title="K=4: AC4.37_0.2_5_20.0 #7")
rk4.8<-plot_Ceratopipra(load_Ceratopiprak4("Ceratopipra_rubrocapilla", "Cerrub.AC4.37_0.2_5_20.0", folder="k4_200k1000k_POPALPHAS", subfolder="run_8"), title="K=4: AC4.37_0.2_5_20.0 #8")
rk4.9<-plot_Ceratopipra(load_Ceratopiprak4("Ceratopipra_rubrocapilla", "Cerrub.AC4.37_0.2_5_20.0", folder="k4_200k1000k_POPALPHAS", subfolder="run_9"), title="K=4: AC4.37_0.2_5_20.0 #9")
rk4.10<-plot_Ceratopipra(load_Ceratopiprak4("Ceratopipra_rubrocapilla", "Cerrub.AC4.37_0.2_5_20.0", folder="k4_200k1000k_POPALPHAS", subfolder="run_10"), title="K=4: AC4.37_0.2_5_20.0 #10")

rk5.1<-plot_Ceratopipra(load_Ceratopiprak5("Ceratopipra_rubrocapilla", "Cerrub.AC4.37_0.2_5_20.0", folder="k5_200k1000k_POPALPHAS", subfolder="run_1"), title="K=5: AC4.37_0.2_5_20.0 #1")
rk5.2<-plot_Ceratopipra(load_Ceratopiprak5("Ceratopipra_rubrocapilla", "Cerrub.AC4.37_0.2_5_20.0", folder="k5_200k1000k_POPALPHAS", subfolder="run_2"), title="K=5: AC4.37_0.2_5_20.0 #2")
rk5.3<-plot_Ceratopipra(load_Ceratopiprak5("Ceratopipra_rubrocapilla", "Cerrub.AC4.37_0.2_5_20.0", folder="k5_200k1000k_POPALPHAS", subfolder="run_3"), title="K=5: AC4.37_0.2_5_20.0 #3")
rk5.4<-plot_Ceratopipra(load_Ceratopiprak5("Ceratopipra_rubrocapilla", "Cerrub.AC4.37_0.2_5_20.0", folder="k5_200k1000k_POPALPHAS", subfolder="run_4"), title="K=5: AC4.37_0.2_5_20.0 #4")
rk5.5<-plot_Ceratopipra(load_Ceratopiprak5("Ceratopipra_rubrocapilla", "Cerrub.AC4.37_0.2_5_20.0", folder="k5_200k1000k_POPALPHAS", subfolder="run_5"), title="K=5: AC4.37_0.2_5_20.0 #5")
rk5.6<-plot_Ceratopipra(load_Ceratopiprak5("Ceratopipra_rubrocapilla", "Cerrub.AC4.37_0.2_5_20.0", folder="k5_200k1000k_POPALPHAS", subfolder="run_6"), title="K=5: AC4.37_0.2_5_20.0 #6")
rk5.7<-plot_Ceratopipra(load_Ceratopiprak5("Ceratopipra_rubrocapilla", "Cerrub.AC4.37_0.2_5_20.0", folder="k5_200k1000k_POPALPHAS", subfolder="run_7"), title="K=5: AC4.37_0.2_5_20.0 #7")
rk5.8<-plot_Ceratopipra(load_Ceratopiprak5("Ceratopipra_rubrocapilla", "Cerrub.AC4.37_0.2_5_20.0", folder="k5_200k1000k_POPALPHAS", subfolder="run_8"), title="K=5: AC4.37_0.2_5_20.0 #8")
rk5.9<-plot_Ceratopipra(load_Ceratopiprak5("Ceratopipra_rubrocapilla", "Cerrub.AC4.37_0.2_5_20.0", folder="k5_200k1000k_POPALPHAS", subfolder="run_9"), title="K=5: AC4.37_0.2_5_20.0 #9")
rk5.10<-plot_Ceratopipra(load_Ceratopiprak5("Ceratopipra_rubrocapilla", "Cerrub.AC4.37_0.2_5_20.0", folder="k5_200k1000k_POPALPHAS", subfolder="run_10"), title="K=5: AC4.37_0.2_5_20.0 #10")

rk6.1<-plot_Ceratopipra(load_Ceratopiprak6("Ceratopipra_rubrocapilla", "Cerrub.AC4.37_0.2_5_20.0", folder="k6_200k1000k_POPALPHAS", subfolder="run_1"), title="K=6: AC4.37_0.2_5_20.0 #1")
rk6.2<-plot_Ceratopipra(load_Ceratopiprak6("Ceratopipra_rubrocapilla", "Cerrub.AC4.37_0.2_5_20.0", folder="k6_200k1000k_POPALPHAS", subfolder="run_2"), title="K=6: AC4.37_0.2_5_20.0 #2")
rk6.3<-plot_Ceratopipra(load_Ceratopiprak6("Ceratopipra_rubrocapilla", "Cerrub.AC4.37_0.2_5_20.0", folder="k6_200k1000k_POPALPHAS", subfolder="run_3"), title="K=6: AC4.37_0.2_5_20.0 #3")
rk6.4<-plot_Ceratopipra(load_Ceratopiprak6("Ceratopipra_rubrocapilla", "Cerrub.AC4.37_0.2_5_20.0", folder="k6_200k1000k_POPALPHAS", subfolder="run_4"), title="K=6: AC4.37_0.2_5_20.0 #4")
rk6.5<-plot_Ceratopipra(load_Ceratopiprak6("Ceratopipra_rubrocapilla", "Cerrub.AC4.37_0.2_5_20.0", folder="k6_200k1000k_POPALPHAS", subfolder="run_5"), title="K=6: AC4.37_0.2_5_20.0 #5")
rk6.6<-plot_Ceratopipra(load_Ceratopiprak6("Ceratopipra_rubrocapilla", "Cerrub.AC4.37_0.2_5_20.0", folder="k6_200k1000k_POPALPHAS", subfolder="run_6"), title="K=6: AC4.37_0.2_5_20.0 #6")
rk6.7<-plot_Ceratopipra(load_Ceratopiprak6("Ceratopipra_rubrocapilla", "Cerrub.AC4.37_0.2_5_20.0", folder="k6_200k1000k_POPALPHAS", subfolder="run_7"), title="K=6: AC4.37_0.2_5_20.0 #7")
rk6.8<-plot_Ceratopipra(load_Ceratopiprak6("Ceratopipra_rubrocapilla", "Cerrub.AC4.37_0.2_5_20.0", folder="k6_200k1000k_POPALPHAS", subfolder="run_8"), title="K=6: AC4.37_0.2_5_20.0 #8")
rk6.9<-plot_Ceratopipra(load_Ceratopiprak6("Ceratopipra_rubrocapilla", "Cerrub.AC4.37_0.2_5_20.0", folder="k6_200k1000k_POPALPHAS", subfolder="run_9"), title="K=6: AC4.37_0.2_5_20.0 #9")
rk6.10<-plot_Ceratopipra(load_Ceratopiprak6("Ceratopipra_rubrocapilla", "Cerrub.AC4.37_0.2_5_20.0", folder="k6_200k1000k_POPALPHAS", subfolder="run_10"), title="K=6: AC4.37_0.2_5_20.0 #10")

rk7.1<-plot_Ceratopipra(load_Ceratopiprak7("Ceratopipra_rubrocapilla", "Cerrub.AC4.37_0.2_5_20.0", folder="k7_200k1000k_POPALPHAS", subfolder="run_1"), title="K=7: AC4.37_0.2_5_20.0 #1")
rk7.2<-plot_Ceratopipra(load_Ceratopiprak7("Ceratopipra_rubrocapilla", "Cerrub.AC4.37_0.2_5_20.0", folder="k7_200k1000k_POPALPHAS", subfolder="run_2"), title="K=7: AC4.37_0.2_5_20.0 #2")
rk7.3<-plot_Ceratopipra(load_Ceratopiprak7("Ceratopipra_rubrocapilla", "Cerrub.AC4.37_0.2_5_20.0", folder="k7_200k1000k_POPALPHAS", subfolder="run_3"), title="K=7: AC4.37_0.2_5_20.0 #3")
rk7.4<-plot_Ceratopipra(load_Ceratopiprak7("Ceratopipra_rubrocapilla", "Cerrub.AC4.37_0.2_5_20.0", folder="k7_200k1000k_POPALPHAS", subfolder="run_4"), title="K=7: AC4.37_0.2_5_20.0 #4")
rk7.5<-plot_Ceratopipra(load_Ceratopiprak7("Ceratopipra_rubrocapilla", "Cerrub.AC4.37_0.2_5_20.0", folder="k7_200k1000k_POPALPHAS", subfolder="run_5"), title="K=7: AC4.37_0.2_5_20.0 #5")
rk7.6<-plot_Ceratopipra(load_Ceratopiprak7("Ceratopipra_rubrocapilla", "Cerrub.AC4.37_0.2_5_20.0", folder="k7_200k1000k_POPALPHAS", subfolder="run_6"), title="K=7: AC4.37_0.2_5_20.0 #6")
rk7.7<-plot_Ceratopipra(load_Ceratopiprak7("Ceratopipra_rubrocapilla", "Cerrub.AC4.37_0.2_5_20.0", folder="k7_200k1000k_POPALPHAS", subfolder="run_7"), title="K=7: AC4.37_0.2_5_20.0 #7")
rk7.8<-plot_Ceratopipra(load_Ceratopiprak7("Ceratopipra_rubrocapilla", "Cerrub.AC4.37_0.2_5_20.0", folder="k7_200k1000k_POPALPHAS", subfolder="run_8"), title="K=7: AC4.37_0.2_5_20.0 #8")
rk7.9<-plot_Ceratopipra(load_Ceratopiprak7("Ceratopipra_rubrocapilla", "Cerrub.AC4.37_0.2_5_20.0", folder="k7_200k1000k_POPALPHAS", subfolder="run_9"), title="K=7: AC4.37_0.2_5_20.0 #9")
rk7.10<-plot_Ceratopipra(load_Ceratopiprak7("Ceratopipra_rubrocapilla", "Cerrub.AC4.37_0.2_5_20.0", folder="k7_200k1000k_POPALPHAS", subfolder="run_10"), title="K=7: AC4.37_0.2_5_20.0 #10")

rk8.1<-plot_Ceratopipra(load_Ceratopiprak8("Ceratopipra_rubrocapilla", "Cerrub.AC4.37_0.2_5_20.0", folder="k8_200k1000k_POPALPHAS", subfolder="run_1"), title="K=8: AC4.37_0.2_5_20.0 #1")
rk8.2<-plot_Ceratopipra(load_Ceratopiprak8("Ceratopipra_rubrocapilla", "Cerrub.AC4.37_0.2_5_20.0", folder="k8_200k1000k_POPALPHAS", subfolder="run_2"), title="K=8: AC4.37_0.2_5_20.0 #2")
rk8.3<-plot_Ceratopipra(load_Ceratopiprak8("Ceratopipra_rubrocapilla", "Cerrub.AC4.37_0.2_5_20.0", folder="k8_200k1000k_POPALPHAS", subfolder="run_3"), title="K=8: AC4.37_0.2_5_20.0 #3")
rk8.4<-plot_Ceratopipra(load_Ceratopiprak8("Ceratopipra_rubrocapilla", "Cerrub.AC4.37_0.2_5_20.0", folder="k8_200k1000k_POPALPHAS", subfolder="run_4"), title="K=8: AC4.37_0.2_5_20.0 #4")
rk8.5<-plot_Ceratopipra(load_Ceratopiprak8("Ceratopipra_rubrocapilla", "Cerrub.AC4.37_0.2_5_20.0", folder="k8_200k1000k_POPALPHAS", subfolder="run_5"), title="K=8: AC4.37_0.2_5_20.0 #5")
rk8.6<-plot_Ceratopipra(load_Ceratopiprak8("Ceratopipra_rubrocapilla", "Cerrub.AC4.37_0.2_5_20.0", folder="k8_200k1000k_POPALPHAS", subfolder="run_6"), title="K=8: AC4.37_0.2_5_20.0 #6")
rk8.7<-plot_Ceratopipra(load_Ceratopiprak8("Ceratopipra_rubrocapilla", "Cerrub.AC4.37_0.2_5_20.0", folder="k8_200k1000k_POPALPHAS", subfolder="run_7"), title="K=8: AC4.37_0.2_5_20.0 #7")
rk8.8<-plot_Ceratopipra(load_Ceratopiprak8("Ceratopipra_rubrocapilla", "Cerrub.AC4.37_0.2_5_20.0", folder="k8_200k1000k_POPALPHAS", subfolder="run_8"), title="K=8: AC4.37_0.2_5_20.0 #8")
rk8.9<-plot_Ceratopipra(load_Ceratopiprak8("Ceratopipra_rubrocapilla", "Cerrub.AC4.37_0.2_5_20.0", folder="k8_200k1000k_POPALPHAS", subfolder="run_9"), title="K=8: AC4.37_0.2_5_20.0 #9")
rk8.10<-plot_Ceratopipra(load_Ceratopiprak8("Ceratopipra_rubrocapilla", "Cerrub.AC4.37_0.2_5_20.0", folder="k8_200k1000k_POPALPHAS", subfolder="run_10"), title="K=8: AC4.37_0.2_5_20.0 #10")

#load clumpp results with fullsearch algorithm
rc2 <- plot_Ceratopipra(load_clumppk2(species="Ceratopipra_rubrocapilla", Dataset="Cerrub.AC4.37_0.2_5_20.0", folder="200k1000k_POPALPHAS", greedy=FALSE), title="K=2: AC4.37_0.2_5_20.0 CLUMPP")
rc3<-plot_Ceratopipra(load_clumppk3(species="Ceratopipra_rubrocapilla", Dataset="Cerrub.AC4.37_0.2_5_20.0", folder="200k1000k_POPALPHAS", greedy=FALSE), title="K=3: AC4.37_0.2_5_20.0 CLUMPP")

#greedy algorithm of clumpp is needed to process K larger than 3
rcg2 <- plot_Ceratopipra(load_clumppk2(species="Ceratopipra_rubrocapilla", Dataset="Cerrub.AC4.37_0.2_5_20.0", folder="200k1000k_POPALPHAS", greedy=TRUE), title="K=2: AC4.37_0.2_5_20.0 CLUMPP")
rcg3 <- plot_Ceratopipra(load_clumppk3(species="Ceratopipra_rubrocapilla", Dataset="Cerrub.AC4.37_0.2_5_20.0", folder="200k1000k_POPALPHAS", greedy=TRUE), title="K=3: AC4.37_0.2_5_20.0 CLUMPP")
rcg4 <- plot_Ceratopipra(load_clumppk4(species="Ceratopipra_rubrocapilla", Dataset="Cerrub.AC4.37_0.2_5_20.0", folder="200k1000k_POPALPHAS", greedy=TRUE), title="K=4: AC4.37_0.2_5_20.0 CLUMPP")
rcg5 <- plot_Ceratopipra(load_clumppk5(species="Ceratopipra_rubrocapilla", Dataset="Cerrub.AC4.37_0.2_5_20.0", folder="200k1000k_POPALPHAS", greedy=TRUE), title="K=5: AC4.37_0.2_5_20.0 CLUMPP")
rcg6 <- plot_Ceratopipra(load_clumppk6(species="Ceratopipra_rubrocapilla", Dataset="Cerrub.AC4.37_0.2_5_20.0", folder="200k1000k_POPALPHAS", greedy=TRUE), title="K=6: AC4.37_0.2_5_20.0 CLUMPP")
rcg7 <- plot_Ceratopipra(load_clumppk7(species="Ceratopipra_rubrocapilla", Dataset="Cerrub.AC4.37_0.2_5_20.0", folder="200k1000k_POPALPHAS", greedy=TRUE), title="K=7: AC4.37_0.2_5_20.0 CLUMPP")
rcg8 <- plot_Ceratopipra(load_clumppk8(species="Ceratopipra_rubrocapilla", Dataset="Cerrub.AC4.37_0.2_5_20.0", folder="200k1000k_POPALPHAS", greedy=TRUE), title="K=8: AC4.37_0.2_5_20.0 CLUMPP")

#plot clumpp results from fullsearch algorithm
ggarrange(rc2+theme(axis.text.x=element_blank()), rc3+theme(axis.text.x=element_blank()), labels=c("1", "2", "3", "4"), ncol = 2, nrow = 2)
## Warning: The `guide` argument in `scale_*()` cannot be `FALSE`. This was deprecated in
## ggplot2 3.3.4.
## ℹ Please use "none" instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

#plot clumpp results from greedy algorithm
ggarrange(rcg2+theme(axis.text.x=element_blank()), rcg3+theme(axis.text.x=element_blank()), rcg4+theme(axis.text.x=element_blank()), rcg5+theme(axis.text.x=element_blank()), rcg6+theme(axis.text.x=element_blank()), rcg7+theme(axis.text.x=element_blank()), rcg8+theme(axis.text.x=element_blank()), labels=c("1", "2", "3", "4", "5", "6", "7" ,"8"), ncol = 2, nrow = 4)

#plot all ten iterations for each value of K
ggarrange(rk2.1+theme(axis.text.x=element_blank()), rk2.2+theme(axis.text.x=element_blank()), rk2.3+theme(axis.text.x=element_blank()), rk2.4+theme(axis.text.x=element_blank()), rk2.5+theme(axis.text.x=element_blank()), rk2.6+theme(axis.text.x=element_blank()), rk2.7+theme(axis.text.x=element_blank()), rk2.8+theme(axis.text.x=element_blank()), rk2.9+theme(axis.text.x=element_blank()), rk2.10+theme(axis.text.x=element_blank()), rc2+theme(axis.text.x=element_blank()), labels=c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "clumpp"), ncol = 3, nrow = 4)

ggarrange(rk3.1+theme(axis.text.x=element_blank()), rk3.2+theme(axis.text.x=element_blank()), rk3.3+theme(axis.text.x=element_blank()), rk3.4+theme(axis.text.x=element_blank()), rk3.5+theme(axis.text.x=element_blank()), rk3.6+theme(axis.text.x=element_blank()), rk3.7+theme(axis.text.x=element_blank()), rk3.8+theme(axis.text.x=element_blank()), rk3.9+theme(axis.text.x=element_blank()), rk3.10+theme(axis.text.x=element_blank()), rc3+theme(axis.text.x=element_blank()), labels=c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "clumpp"), ncol = 3, nrow = 4)

ggarrange(rk4.1+theme(axis.text.x=element_blank()), rk4.2+theme(axis.text.x=element_blank()), rk4.3+theme(axis.text.x=element_blank()), rk4.4+theme(axis.text.x=element_blank()), rk4.5+theme(axis.text.x=element_blank()), rk4.6+theme(axis.text.x=element_blank()), rk4.7+theme(axis.text.x=element_blank()), rk4.8+theme(axis.text.x=element_blank()), rk4.9+theme(axis.text.x=element_blank()), rk4.10+theme(axis.text.x=element_blank()), labels=c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10"), ncol = 3, nrow = 4)

ggarrange(rk5.1+theme(axis.text.x=element_blank()), rk5.2+theme(axis.text.x=element_blank()), rk5.3+theme(axis.text.x=element_blank()), rk5.4+theme(axis.text.x=element_blank()), rk5.5+theme(axis.text.x=element_blank()), rk5.6+theme(axis.text.x=element_blank()), rk5.7+theme(axis.text.x=element_blank()), rk5.8+theme(axis.text.x=element_blank()), rk5.9+theme(axis.text.x=element_blank()), rk5.10+theme(axis.text.x=element_blank()), labels=c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10"), ncol = 3, nrow = 4)

ggarrange(rk6.1+theme(axis.text.x=element_blank()), rk6.2+theme(axis.text.x=element_blank()), rk6.3+theme(axis.text.x=element_blank()), rk6.4+theme(axis.text.x=element_blank()), rk6.5+theme(axis.text.x=element_blank()), rk6.6+theme(axis.text.x=element_blank()), rk6.7+theme(axis.text.x=element_blank()), rk6.8+theme(axis.text.x=element_blank()), rk6.9+theme(axis.text.x=element_blank()), rk6.10+theme(axis.text.x=element_blank()), labels=c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10"), ncol = 3, nrow = 4)

ggarrange(rk7.1+theme(axis.text.x=element_blank()), rk7.2+theme(axis.text.x=element_blank()), rk7.3+theme(axis.text.x=element_blank()), rk7.4+theme(axis.text.x=element_blank()), rk7.5+theme(axis.text.x=element_blank()), rk7.6+theme(axis.text.x=element_blank()), rk7.7+theme(axis.text.x=element_blank()), rk7.8+theme(axis.text.x=element_blank()), rk7.9+theme(axis.text.x=element_blank()), rk7.10+theme(axis.text.x=element_blank()), labels=c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10"), ncol = 3, nrow = 4)

ggarrange(rk8.1+theme(axis.text.x=element_blank()), rk8.2+theme(axis.text.x=element_blank()), rk8.3+theme(axis.text.x=element_blank()), rk8.4+theme(axis.text.x=element_blank()), rk8.5+theme(axis.text.x=element_blank()), rk8.6+theme(axis.text.x=element_blank()), rk8.7+theme(axis.text.x=element_blank()), rk8.8+theme(axis.text.x=element_blank()), rk8.9+theme(axis.text.x=element_blank()), rk8.10+theme(axis.text.x=element_blank()), labels=c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10"), ncol = 3, nrow = 4)

Results:
* The ten runs for K=2 look qualitatively indistinguishable (quantitatively, the numbers are ever-so-slightly different, so they are not just accidental duplicates of the exact same run).
* The ten runs for K=3 look qualitatively indistinguishable (quantitatively, the numbers are ever-so-slightly different, so they are not just accidental duplicates of the exact same run).
* The ten runs for K=4 look qualitatively indistinguishable (quantitatively, the numbers are ever-so-slightly different, so they are not just accidental duplicates of the exact same run).
* K=5-8 did not converge; there are qualitative differences amongst the ten runs. 1,000,000 generations may have been insufficient length of the MCMC chain, or the data may not have a single good fit to models with that many populations (either because the data lacks sufficient signal or there is no realistic biological signal to be found).
* The greedy and fullsearch algorithm gave identical results for K=2 and K=3 (fullsearch could not complete for higher K due to computational limitations - would have taken many days).

Evaluate best K

After viewing the results, we can evaluate the best K to report. Looking at the results, K=2-4 converged, and the clusters seem to correspond to geographic groupings that we would expect to be differentiated. K=5-8 did not converge, and most replicates contain “ghost” populations (clusters in which no sample had more than 50% assignment to that cluster).

One metric to help choose the best K to report is MedMedK. To use this metric, we need to count how many clusters correspond to reasonable populations based on prior knowledge of the samples (eg., geography). We need to determine whether any population has a median of more than 50% assignment to a given cluster for a given iterations. This can be done by eye for most of them, but a few are on the edge, so we should look at the exact values.

#This loads the data for a given run, excludes the Xingu headwaters, and calculates the median and mean assignment to each cluster for each a priori population
load_Ceratopiprak4("Ceratopipra_rubrocapilla", "Cerrub.AC4.37_0.2_5_20.0", folder="k4_200k1000k_POPALPHAS", subfolder="run_10") %>%
  filter(!SampleID=="Ceratopipra_rubrocapilla_MT087") %>% #for Xingu clusters, we should exclude the two headwater samples, which likely spread east from a Rondonia origin.
  filter(!SampleID=="Ceratopipra_rubrocapilla_MT123") %>%
  group_by(Population, pop) %>%
  summarise(mean=mean(as.numeric(assignment_p)), median=median(as.numeric(assignment_p))) %>%
  arrange(desc(median))
## `summarise()` has grouped output by 'Population'. You can override using the
## `.groups` argument.
## # A tibble: 20 × 4
## # Groups:   Population [5]
##    Population     pop      mean  median
##    <chr>          <fct>   <dbl>   <dbl>
##  1 AtlanticForest p1    0.957   0.954  
##  2 Inambari       p3    0.928   0.934  
##  3 Rondonia       p3    0.595   0.599  
##  4 Tapajos        p2    0.542   0.516  
##  5 Xingu/Belém    p4    0.456   0.435  
##  6 Rondonia       p2    0.381   0.381  
##  7 Tapajos        p3    0.331   0.299  
##  8 Xingu/Belém    p1    0.232   0.231  
##  9 Xingu/Belém    p2    0.214   0.228  
## 10 Inambari       p2    0.0688  0.0633 
## 11 Xingu/Belém    p3    0.0986  0.0614 
## 12 Tapajos        p4    0.0703  0.0588 
## 13 Tapajos        p1    0.0567  0.0312 
## 14 AtlanticForest p4    0.0299  0.027  
## 15 AtlanticForest p2    0.00928 0.0049 
## 16 Rondonia       p4    0.0111  0.0037 
## 17 AtlanticForest p3    0.0043  0.00325
## 18 Rondonia       p1    0.0125  0.003  
## 19 Inambari       p4    0.00215 0.00165
## 20 Inambari       p1    0.00149 0.00115
#Xingu samples from Araguaina seem to be have different assignment values than other Xingu locations.
load_Ceratopiprak4("Ceratopipra_rubrocapilla", "Cerrub.AC4.37_0.2_5_20.0", folder="k4_200k1000k_POPALPHAS", subfolder="run_10") %>%
  filter(Locality=="Araguaina") %>% 
  group_by(Population, pop) %>%
  summarise(mean=mean(as.numeric(assignment_p)), median=median(as.numeric(assignment_p))) %>%
  arrange(desc(median))
## `summarise()` has grouped output by 'Population'. You can override using the
## `.groups` argument.
## # A tibble: 4 × 4
## # Groups:   Population [1]
##   Population  pop     mean median
##   <chr>       <fct>  <dbl>  <dbl>
## 1 Xingu/Belém p4    0.702   0.736
## 2 Xingu/Belém p1    0.232   0.235
## 3 Xingu/Belém p2    0.0522  0.021
## 4 Xingu/Belém p3    0.0134  0.008

For example, iteration 10 of K=4 shows that Atlantic Forest has more than 50% median assignment to p1, so that is a valid cluster. Inambari and Rondonia have more than 50% median assignment to p3, so that is a valid cluster. Tapajos has more than 50% median assignment to p2, so that is a valid cluster. Araguaina has more than 50% median assignment to p4, so that is a valid cluster. This process repeats for each of ten iterations for each K, and then we take the median of those values for each K (note that there is no variation amongst K=2-4, so taking the median vs mean makes no difference).

save figures

Now we can save some images to use as figures in the manuscript and to keep a record.

#Save K=2 and 3 with the fullsearch algorithm of CLUMPP (fullsearch could not completefor K=4)
cbbPalette <- c("#E69F00", "#0072B2", "#F0E442", "#009E73", "#E69F00", "#56B4E9", "#CC79A7")
plot_Ceratopipra2(load_clumppk2(species="Ceratopipra_rubrocapilla", Dataset="Cerrub.AC4.37_0.2_5_20.0", folder="200k1000k_POPALPHAS", greedy=FALSE), title="K=2: AC4.37_0.2_5_20.0 CLUMPP")+
  theme(axis.title.y = element_blank(), axis.text.y = element_blank(), axis.title.x = element_blank(), legend.position = "none")+
  scale_fill_manual(values=cbbPalette)
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.

#ggsave("./STRUCTURE.AC4.37_0.2_5_20.0.K2.200k1000k_POPALPHAS.fullsearch.pdf", bg='transparent', width = 30, height = 10, units = "cm")

cbbPalette <- c("#F0E442", "#E69F00", "#0072B2", "#009E73", "#E69F00", "#56B4E9", "#CC79A7")
plot_Ceratopipra2(load_clumppk3(species="Ceratopipra_rubrocapilla", Dataset="Cerrub.AC4.37_0.2_5_20.0", folder="200k1000k_POPALPHAS", greedy=FALSE), title="K=3: AC4.37_0.2_5_20.0 CLUMPP")+
  theme(axis.title.y = element_blank(), axis.text.y = element_blank(), axis.title.x = element_blank(), legend.position = "none")+
  scale_fill_manual(values=cbbPalette)
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.

#ggsave("./STRUCTURE.AC4.37_0.2_5_20.0.K3.200k1000k_POPALPHAS.fullsearch.pdf", bg='transparent', width = 30, height = 10, units = "cm")


#Save K=2-8 with the greedy algorithm of CLUMPP (fullsearch could not completefor K=4+)
cbbPalette <- c("#E69F00", "#0072B2", "#F0E442", "#009E73", "#E69F00", "#56B4E9", "#CC79A7")
plot_Ceratopipra2(load_clumppk2(species="Ceratopipra_rubrocapilla", Dataset="Cerrub.AC4.37_0.2_5_20.0", folder="200k1000k_POPALPHAS", greedy=TRUE), title="K=2: AC4.37_0.2_5_20.0 CLUMPP")+
  theme(axis.title.y = element_blank(), axis.text.y = element_blank(), axis.title.x = element_blank(), legend.position = "none")+
  scale_fill_manual(values=cbbPalette)
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.

#ggsave("./STRUCTURE.AC4.37_0.2_5_20.0.K2.200k1000k_POPALPHAS.pdf", bg='transparent', width = 30, height = 10, units = "cm")
plot_Ceratopipra2(load_clumppk2(species="Ceratopipra_rubrocapilla", Dataset="Cerrub.AC4.37_0.2_5_20.0", folder="200k1000k_POPALPHAS", greedy=TRUE, latlon="longitude"), title="K=2: AC4.37_0.2_5_20.0 CLUMPP")+
  theme(axis.title.y = element_blank(), axis.text.y = element_blank(), axis.title.x = element_blank(), legend.position = "none")+
  scale_fill_manual(values=cbbPalette)
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.

#ggsave("./STRUCTURE.AC4.37_0.2_5_20.0.K2.200k1000k_POPALPHAS.longitude.pdf", bg='transparent', width = 30, height = 10, units = "cm")


cbbPalette <- c("#F0E442", "#E69F00", "#0072B2", "#009E73", "#E69F00", "#56B4E9", "#CC79A7")
plot_Ceratopipra2(load_clumppk3(species="Ceratopipra_rubrocapilla", Dataset="Cerrub.AC4.37_0.2_5_20.0", folder="200k1000k_POPALPHAS", greedy=TRUE), title="K=3: AC4.37_0.2_5_20.0 CLUMPP")+
  theme(axis.title.y = element_blank(), axis.text.y = element_blank(), axis.title.x = element_blank(), legend.position = "none")+
  scale_fill_manual(values=cbbPalette)
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.

#ggsave("./STRUCTURE.AC4.37_0.2_5_20.0.K3.200k1000k_POPALPHAS.pdf", bg='transparent', width = 30, height = 10, units = "cm")
plot_Ceratopipra2(load_clumppk3(species="Ceratopipra_rubrocapilla", Dataset="Cerrub.AC4.37_0.2_5_20.0", folder="200k1000k_POPALPHAS", greedy=TRUE, latlon="longitude"), title="K=3: AC4.37_0.2_5_20.0 CLUMPP")+
  theme(axis.title.y = element_blank(), axis.text.y = element_blank(), axis.title.x = element_blank(), legend.position = "none")+
  scale_fill_manual(values=cbbPalette)
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.

#ggsave("./STRUCTURE.AC4.37_0.2_5_20.0.K3.200k1000k_POPALPHAS.longitude.pdf", bg='transparent', width = 30, height = 10, units = "cm")

cbbPalette <- c("#F0E442", "#E69F00", "#009E73", "#CC79A7", "#E69F00", "#56B4E9", "#CC79A7")
plot_Ceratopipra2(load_clumppk4(species="Ceratopipra_rubrocapilla", Dataset="Cerrub.AC4.37_0.2_5_20.0", folder="200k1000k_POPALPHAS", greedy=TRUE), title="K=4: AC4.37_0.2_5_20.0 CLUMPP")+
  theme(axis.title.y = element_blank(), axis.text.y = element_blank(), axis.title.x = element_blank(), legend.position = "none")+
  scale_fill_manual(values=cbbPalette)
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.

#ggsave("./STRUCTURE.AC4.37_0.2_5_20.0.K4.200k1000k_POPALPHAS.pdf", bg='transparent', width = 30, height = 10, units = "cm")
plot_Ceratopipra2(load_clumppk4(species="Ceratopipra_rubrocapilla", Dataset="Cerrub.AC4.37_0.2_5_20.0", folder="200k1000k_POPALPHAS", greedy=TRUE, latlon="longitude"), title="K=4: AC4.37_0.2_5_20.0 CLUMPP")+
  theme(axis.title.y = element_blank(), axis.text.y = element_blank(), axis.title.x = element_blank(), legend.position = "none")+
  scale_fill_manual(values=cbbPalette)
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.

#ggsave("./STRUCTURE.AC4.37_0.2_5_20.0.K4.200k1000k_POPALPHAS.longitude.pdf", bg='transparent', width = 30, height = 10, units = "cm")


cbbPalette <- c("#F0E442", "#E69F00", "#CC79A7", "#009E73", "#000000", "#56B4E9", "#CC79A7")
plot_Ceratopipra2(load_clumppk5(species="Ceratopipra_rubrocapilla", Dataset="Cerrub.AC4.37_0.2_5_20.0", folder="200k1000k_POPALPHAS", greedy=TRUE), title="K=5: AC4.37_0.2_5_20.0 CLUMPP")+
  theme(axis.title.y = element_blank(), axis.text.y = element_blank(), axis.title.x = element_blank(), legend.position = "none")+
  scale_fill_manual(values=cbbPalette)
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.

#ggsave("./STRUCTURE.AC4.37_0.2_5_20.0.K5.200k1000k_POPALPHAS.pdf", bg='transparent', width = 30, height = 10, units = "cm")

cbbPalette <- c("#F0E442", "#0072B2", "#E69F00", "#CC79A7", "#009E73", "#000000", "#56B4E9", "#CC79A7")
plot_Ceratopipra2(load_clumppk6(species="Ceratopipra_rubrocapilla", Dataset="Cerrub.AC4.37_0.2_5_20.0", folder="200k1000k_POPALPHAS", greedy=TRUE), title="K=6: AC4.37_0.2_5_20.0 CLUMPP")+
  theme(axis.title.y = element_blank(), axis.text.y = element_blank(), axis.title.x = element_blank(), legend.position = "none")+
  scale_fill_manual(values=cbbPalette)
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.

#ggsave("./STRUCTURE.AC4.37_0.2_5_20.0.K6.200k1000k_POPALPHAS.pdf", bg='transparent', width = 30, height = 10, units = "cm")

cbbPalette <- c("#0072B2", "#F0E442", "#56B4E9", "#E69F00", "#CC79A7", "#009E73", "#000000")
plot_Ceratopipra2(load_clumppk7(species="Ceratopipra_rubrocapilla", Dataset="Cerrub.AC4.37_0.2_5_20.0", folder="200k1000k_POPALPHAS", greedy=TRUE), title="K=7: AC4.37_0.2_5_20.0 CLUMPP")+
  theme(axis.title.y = element_blank(), axis.text.y = element_blank(), axis.title.x = element_blank(), legend.position = "none")+
  scale_fill_manual(values=cbbPalette)
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.

#ggsave("./STRUCTURE.AC4.37_0.2_5_20.0.K7.200k1000k_POPALPHAS.pdf", bg='transparent', width = 30, height = 10, units = "cm")

cbbPalette <- c("brown", "#0072B2", "#F0E442", "#56B4E9", "#E69F00", "#CC79A7", "#009E73", "#000000")
plot_Ceratopipra2(load_clumppk8(species="Ceratopipra_rubrocapilla", Dataset="Cerrub.AC4.37_0.2_5_20.0", folder="200k1000k_POPALPHAS", greedy=TRUE), title="K=8: AC4.37_0.2_5_20.0 CLUMPP")+
  theme(axis.title.y = element_blank(), axis.text.y = element_blank(), axis.title.x = element_blank(), legend.position = "none")+
  scale_fill_manual(values=cbbPalette)
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.

#ggsave("./STRUCTURE.AC4.37_0.2_5_20.0.K8.200k1000k_POPALPHAS.pdf", bg='transparent', width = 30, height = 10, units = "cm")

Evanno Figure

Now let’s generate a figure showing the delta K values for each value of K.

Evanno <- NULL
Evanno$K_values <- 1:8
Evanno$delta_K <- as.numeric(c("NA", "999.100254", "79.851556", "117.21132", "1.078186", "61.797488", "1.909957", "NA"))
## Warning: NAs introduced by coercion
Evanno$LnPK <- as.numeric(c("-275915.79", "-266991.56", "-265950.9", "-266507.28", "-306063.34", "-267299.72", "-269712.03", "-271162.85"))

ggplot(as.data.frame(Evanno), aes(x=K_values, y=delta_K))+
  geom_point()+
  geom_line()+
  xlab("K")+
  ylab("Delta K")+
  theme_classic()
## Warning: Removed 2 rows containing missing values (`geom_point()`).
## Warning: Removed 2 rows containing missing values (`geom_line()`).

#ggsave("Evanno.pdf", bg='transparent', width = 30, height = 10, units = "cm")

ggplot(as.data.frame(Evanno), aes(x=K_values, y=LnPK))+
  geom_point()+
  geom_line()+
  xlab("K")+
  ylab("Ln PK")+
  theme_classic()

map figure

Now let’s put the STRUCTURE plot bars onto a map so that we can see how the structure population assignments are distributed. Often authors will opt to aggregate samples from the same locality and then plot an average as a pie chart. I do not personally like this as it masks information about how variable samples are within a locality. I also prefer bars over pies so I will plot bars on the map.

# Load required libraries
library(sf)
## Linking to GEOS 3.11.0, GDAL 3.5.3, PROJ 9.1.0; sf_use_s2() is TRUE
#load the range map, outline of the land, and rivers. This data will not be available in the repository because I do not have permissions to redistribute the raw data.

#load the range of C. rubrocapilla
load("~/Documents/GitHub/Ceratopipra/Writing/Manuscript_drafts/Figures/Figure1.1_map/Cerrub_range.RData")
#read the shapefile of rivers
brazil_rivers <- st_read("~/Documents/GitHub/Ceratopipra/Writing/Manuscript_drafts/Figures/Figure1.1_map/brazil_rivers_shapefile/ne_10m_rivers_lake_centerlines.shp")
## Reading layer `ne_10m_rivers_lake_centerlines' from data source 
##   `/Users/elsemikkelsen/Documents/GitHub/Ceratopipra/Writing/Manuscript_drafts/Figures/Figure1.1_map/brazil_rivers_shapefile/ne_10m_rivers_lake_centerlines.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 1473 features and 38 fields
## Geometry type: MULTILINESTRING
## Dimension:     XY
## Bounding box:  xmin: -164.9035 ymin: -52.15775 xmax: 177.5204 ymax: 75.79348
## Geodetic CRS:  WGS 84
# Filter to keep only major rivers of interest (otherwise the map would be too messy and hard to read with all the small rivers plotted)
#brazil_rivers <- brazil_rivers[brazil_rivers$name %in% c("Amazonas", "Madeira", "Tapajós", "Xingu", "Tocantins", "Rio Negro", "Teles Pires", "Juruena", "Jamanxim", "Arapiuns", "Crepori", "Cupari", "Purus", "Baía de Marajó", "São Francisco", "Araguaia", "Madre de Dios", "Marañón", "Branco", "Orinoco", "Purús", "Ucayali", "Japurá", "Caquetá"), ] #"Braco Menor", "Mamoré", "Juruá", "Jari", "Putumayo", 
#add Huallaga
brazil_rivers <- brazil_rivers[brazil_rivers$name %in% c("Amazonas", "Madeira", "Tapajós", "Xingu", "Tocantins", "Rio Negro", "Teles Pires", "Juruena", "Jamanxim", "Arapiuns", "Crepori", "Cupari", "Purus", "Baía de Marajó", "São Francisco", "Araguaia", "Madre de Dios", "Marañón", "Branco", "Orinoco", "Purús", "Ucayali", "Japurá", "Caquetá", "Huallaga"), ] #"Braco Menor", "Mamoré", "Juruá", "Jari", "Putumayo", 

#read the shapefile of coastline
brazil_coastline <- st_read("~/Documents/GitHub/Ceratopipra/Writing/Manuscript_drafts/Figures/Figure1.1_map/brazil_coastline_shapefile/ne_10m_coastline.shp")
## Reading layer `ne_10m_coastline' from data source 
##   `/Users/elsemikkelsen/Documents/GitHub/Ceratopipra/Writing/Manuscript_drafts/Figures/Figure1.1_map/brazil_coastline_shapefile/ne_10m_coastline.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 4133 features and 3 fields
## Geometry type: LINESTRING
## Dimension:     XY
## Bounding box:  xmin: -180 ymin: -85.22194 xmax: 180 ymax: 83.6341
## Geodetic CRS:  WGS 84
#read the country borders
brazil_borders <- st_read("~/Documents/GitHub/Ceratopipra/Writing/Manuscript_drafts/Figures/Figure1.1_map/brazil_borders_shapefile/ne_10m_admin_0_countries.shp")
## Reading layer `ne_10m_admin_0_countries' from data source 
##   `/Users/elsemikkelsen/Documents/GitHub/Ceratopipra/Writing/Manuscript_drafts/Figures/Figure1.1_map/brazil_borders_shapefile/ne_10m_admin_0_countries.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 258 features and 168 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -180 ymin: -90 xmax: 180 ymax: 83.6341
## Geodetic CRS:  WGS 84
#firstly, to reduce overplotting I am going to add some offsets to the longitude of samples from the same locality. This way their points (bars) will be plotted side-by-side instead of on top of each other. I will arbitrarily make side-by-side points have an offset of "1" between them. For example, if there are three samples from the same locality, I will give the first an offset of -1, the second an offset of 0, and the third an offset of +1. These offsets can later be arbitrarily scaled as needed to adjust how far apart they get plotted (which will depend on how wide we want to plot our bars.)

#we will make a dataframe where column 1 is the sample's codename as it appears in the STRUCTURE data we will plot, and column 2 is the mannually-chosen offset to give each sample.
offsets <- data.frame(Code=c("UNA002", "UNA018", "UNA088", "UNA110", "UNA124", "MT087", "MT123", "PPS352", "PPS041", "OP100", "AMZ551", "AMZ610", "PPS221", "UFAC305", "BR36440", "TLP123", "TLP025", "MSF107", "PPS174", "CPE012", "CPE025", "CPE049", "CPE069", "CPEII008", "MAR092", "BR036", "UFAC1007", "BR033", "UFAC476", "GAPTO259", "GAPTO268", "GAPTO277", "GAPTO327", "CAM117", "OM022", "OM112", "OM172", "MPDS628", "PIME521", "PIME522", "PIME482", "PIME169", "AMANA137", "CUJ072", "PIME460", "AMANA034", "CUJ027", "PUC007", "PUC008", "RDO007", "PIME062", "PIME079", "AMA340", "AMA601", "AMA656", "MF005", "B4615", "B5082", "FPR006", "FPR015", "FPR021", "PIME134", "PIME135", "UHE442", "WM281", "WM293", "MAM010", "PIME024", "PPBIO221", "PPBIO305"), offset=c(-2, -1, 0, 1, 2, -0.5, 0.5, 0, 0, 0, -0.5, 0.5, 0, 0, 0, 0, 0, -0.5, 0.5, -2, -1, 0, 1, 2, 0, 0, 0, 0, 0, -1.5, -0.5, 0.5, 1.5, 0, -1, 0, 1, 0, -0.5, 0.5, 0, -0.5, 0.5, 0, 0, 0, 0, -0.5, 0.5, 0, -0.5, 0.5, -1, 0, 1, 0, -0.5, 0.5, -1, 0, 1, -0.5, 0.5, 0, -0.5, 0.5, 0, 0, -0.5, 0.5))

#load the data for K=2
data <- load_clumppk2(species="Ceratopipra_rubrocapilla", Dataset="Cerrub.AC4.37_0.2_5_20.0", folder="200k1000k_POPALPHAS", greedy=TRUE) %>%
  select(!(avg_assignment)) %>%
  spread(key=pop, value=assignment_p)

#add the offset data to the STRUCTURE dataframe
offset_data <- merge(data, offsets, by="Code", all.x = T)
#replace NAs with 0, if any are present
offset_data$offset[is.na(offset_data$offset)] <- 0

ggplot(offset_data %>% mutate(offset=offset/1.9))+
  #geom_sf(data = atlantic, color = "#defade", fill = "#defade") +
  #geom_sf(data = amazon, color = "#9CD089", fill = "#9CD089") +
  geom_sf(data = Cerrub_range, color = "#ffcccb", fill = "#ffcccb") +
  geom_sf(data = brazil_rivers, color = "#38afcd", linewidth=1) +
  geom_sf(data = brazil_coastline, color = "black") +
  geom_sf(data = brazil_borders, color = "black", fill = NA) +
  #geom_sf(data = amazon, color = "black", fill=NA) +
  #coord_sf(xlim = c(-80, -34), ylim = c(-24, 0), expand = FALSE) +
  coord_sf(xlim = c(-80, -34), ylim = c(-17, 0), expand = FALSE) +
     geom_rect(aes(xmin = Longitude+offset, ymin = Latitude, xmax = Longitude+offset + 0.5, ymax = Latitude+1.5), color="black")+
  geom_rect(aes(xmin = Longitude+offset, ymin = Latitude, xmax = Longitude+offset + 0.5, ymax = Latitude+as.numeric(p1)*1.5), fill="#0072B2")+
      geom_rect(aes(xmin = Longitude+offset, ymin = Latitude+(p1*1.5), xmax = Longitude+offset + 0.5, ymax = Latitude+(p1*1.5)+(as.numeric(p2)*1.5)), fill="#E69F00")+
 theme_void()+
  theme(legend.position = "none")

ggsave("map_K2.pdf")
## Saving 7 x 5 in image
#load the data for K=3
data <- load_clumppk3(species="Ceratopipra_rubrocapilla", Dataset="Cerrub.AC4.37_0.2_5_20.0", folder="200k1000k_POPALPHAS", greedy=FALSE) %>%
  select(!(avg_assignment)) %>%
  spread(key=pop, value=assignment_p)

#add the offset data to the STRUCTURE dataframe
offset_data <- merge(data, offsets, by="Code", all.x = T)
#replace NAs with 0, if any are present
offset_data$offset[is.na(offset_data$offset)] <- 0

ggplot(offset_data %>% mutate(offset=offset/1.9))+
  #geom_sf(data = atlantic, color = "#defade", fill = "#defade") +
  #geom_sf(data = amazon, color = "#9CD089", fill = "#9CD089") +
  geom_sf(data = Cerrub_range, color = "#ffcccb", fill = "#ffcccb") +
  geom_sf(data = brazil_rivers, color = "#38afcd", linewidth=1) +
  geom_sf(data = brazil_coastline, color = "black") +
  geom_sf(data = brazil_borders, color = "black", fill = NA) +
  #geom_sf(data = amazon, color = "black", fill=NA) +
  #coord_sf(xlim = c(-80, -34), ylim = c(-24, 0), expand = FALSE) +
  coord_sf(xlim = c(-80, -34), ylim = c(-17, 0), expand = FALSE) +
     geom_rect(aes(xmin = Longitude+offset, ymin = Latitude, xmax = Longitude+offset + 0.5, ymax = Latitude+1.5), color="black")+
  geom_rect(aes(xmin = Longitude+offset, ymin = Latitude, xmax = Longitude+offset + 0.5, ymax = Latitude+as.numeric(p3)*1.5), fill="#0072B2")+
      geom_rect(aes(xmin = Longitude+offset, ymin = Latitude+(p3*1.5), xmax = Longitude+offset + 0.5, ymax = Latitude+(p3*1.5)+(as.numeric(p2)*1.5)), fill="#E69F00")+
      geom_rect(aes(xmin = Longitude+offset, ymin = Latitude+(p3*1.5)+(as.numeric(p2)*1.5), xmax = Longitude+offset + 0.5, ymax = Latitude+(p3*1.5)+(as.numeric(p2)*1.5)+(as.numeric(p1)*1.5)), fill="#F0E442")+
 theme_void()+
  theme(legend.position = "none")

ggsave("map_K3.pdf")
## Saving 7 x 5 in image
#load the data for K=4
data <- load_clumppk4(species="Ceratopipra_rubrocapilla", Dataset="Cerrub.AC4.37_0.2_5_20.0", folder="200k1000k_POPALPHAS", greedy=TRUE) %>%
  select(!(avg_assignment)) %>%
  spread(key=pop, value=assignment_p)

offset_data <- merge(data, offsets, by="Code", all.x = T)
offset_data$offset[is.na(offset_data$offset)] <- 0

ggplot(offset_data %>% mutate(offset=offset/1.9))+
  #geom_sf(data = atlantic, color = "#defade", fill = "#defade") +
  #geom_sf(data = amazon, color = "#9CD089", fill = "#9CD089") +
  geom_sf(data = Cerrub_range, color = "#ffcccb", fill = "#ffcccb") +
  geom_sf(data = brazil_rivers, color = "#38afcd", linewidth=1) +
  geom_sf(data = brazil_coastline, color = "black") +
  geom_sf(data = brazil_borders, color = "black", fill = NA) +
  #geom_sf(data = amazon, color = "black", fill=NA) +
 # coord_sf(xlim = c(-80, -34), ylim = c(-24, 0), expand = FALSE) +
  coord_sf(xlim = c(-80, -34), ylim = c(-17, 0), expand = FALSE) +
     geom_rect(aes(xmin = Longitude+offset, ymin = Latitude, xmax = Longitude+offset + 0.5, ymax = Latitude+1.5), color="black")+
     geom_rect(aes(xmin = Longitude+offset, ymin = Latitude, xmax = Longitude+offset + 0.5, ymax = Latitude+as.numeric(p1)*1.5), fill="#CC79A7")+
      geom_rect(aes(xmin = Longitude+offset, ymin = Latitude+(p1*1.5), xmax = Longitude+offset + 0.5, ymax = Latitude+(p1*1.5)+(as.numeric(p3)*1.5)), fill="#009E73")+
      geom_rect(aes(xmin = Longitude+offset, ymin = Latitude+(p1*1.5)+(as.numeric(p3)*1.5), xmax = Longitude + 0.5+offset, ymax = Latitude+(p1*1.5)+(as.numeric(p3)*1.5)+(as.numeric(p4)*1.5)), fill="#F0E442")+
      geom_rect(aes(xmin = Longitude+offset, ymin = Latitude+(p1*1.5)+(as.numeric(p3)*1.5)+(as.numeric(p4)*1.5), xmax = Longitude+offset + 0.5, ymax = Latitude+(p1*1.5)+(as.numeric(p3)*1.5)+(as.numeric(p4)*1.5)+(as.numeric(p2)*1.5)), fill="#E69F00")+
 theme_void()+
  theme(legend.position = "none")

ggsave("map_K4.pdf")
## Saving 7 x 5 in image

compare with snmf

Lastly, I will compare the results of STRUCTURE with snmf. Results are qualitatively similar, though snmf seems to make our samples with more missing data assigned with more intermediate population assignments.

load_Ceratopiprasnmfk2 <- function(folder, Dataset, samplenames){
  Cerysnmf_data_k2 <- read_delim(col_names=c("p1", "p2"), file=paste(folder, "/", Dataset, ".2.Q", sep=""), delim=" ", trim_ws=TRUE, show_col_types = FALSE) 
  Cerysnmf_data_k2$Code <- samples
  Cerysnmf_data_k2 <- merge(Cerysnmf_data_k2, rubrocapilla_metadata, by="Code")
  Cerysnmf_data_k2_long <- Cerysnmf_data_k2 %>%
  arrange(Longitude) %>%
  arrange(Population)%>%
   mutate(Code=factor(Code, levels=Code)) %>%
  gather(key=pop, value=assignment_p, p1:p2) %>%
    group_by(pop) %>%
    mutate(avg_assignment=mean(as.numeric(assignment_p))) %>%
    ungroup() %>%
    mutate(pop=fct_reorder(pop,avg_assignment))
  return(Cerysnmf_data_k2_long)
}

load_Ceratopiprasnmfk3 <- function(folder, Dataset, samplenames){
  Cerysnmf_data_k2 <- read_delim(col_names=c("p1", "p2", "p3"), file=paste(folder, "/", Dataset, ".3.Q", sep=""), delim=" ", trim_ws=TRUE, show_col_types = FALSE) 
  Cerysnmf_data_k2$Code <- samples
  Cerysnmf_data_k2 <- merge(Cerysnmf_data_k2, rubrocapilla_metadata, by="Code")
  Cerysnmf_data_k2_long <- Cerysnmf_data_k2 %>%
  arrange(Longitude) %>%
  arrange(Population)%>%
   mutate(Code=factor(Code, levels=Code)) %>%
  gather(key=pop, value=assignment_p, p1:p3) %>%
    group_by(pop) %>%
    mutate(avg_assignment=mean(as.numeric(assignment_p))) %>%
    ungroup() %>%
    mutate(pop=fct_reorder(pop,avg_assignment))
  return(Cerysnmf_data_k2_long)
}

load_Ceratopiprasnmfk4 <- function(folder, Dataset, samplenames){
  Cerysnmf_data_k2 <- read_delim(col_names=c("p1", "p2", "p3", "p4"), file=paste(folder, "/", Dataset, ".4.Q", sep=""), delim=" ", trim_ws=TRUE, show_col_types = FALSE) 
  Cerysnmf_data_k2$Code <- samples
  Cerysnmf_data_k2 <- merge(Cerysnmf_data_k2, rubrocapilla_metadata, by="Code")
  Cerysnmf_data_k2_long <- Cerysnmf_data_k2 %>%
  arrange(Longitude) %>%
  arrange(Population)%>%
   mutate(Code=factor(Code, levels=Code)) %>%
  gather(key=pop, value=assignment_p, p1:p4) %>%
    group_by(pop) %>%
    mutate(avg_assignment=mean(as.numeric(assignment_p))) %>%
    ungroup() %>%
    mutate(pop=fct_reorder(pop,avg_assignment))
  return(Cerysnmf_data_k2_long)
}

load_Ceratopiprasnmfk5 <- function(folder, Dataset, samplenames){
  Cerysnmf_data_k2 <- read_delim(col_names=c("p1", "p2", "p3", "p4", "p5"), file=paste(folder, "/", Dataset, ".5.Q", sep=""), delim=" ", trim_ws=TRUE, show_col_types = FALSE) 
  Cerysnmf_data_k2$Code <- samples
  Cerysnmf_data_k2 <- merge(Cerysnmf_data_k2, rubrocapilla_metadata, by="Code")
  Cerysnmf_data_k2_long <- Cerysnmf_data_k2 %>%
  arrange(Longitude) %>%
  arrange(Population)%>%
   mutate(Code=factor(Code, levels=Code)) %>%
  gather(key=pop, value=assignment_p, p1:p5) %>%
    group_by(pop) %>%
    mutate(avg_assignment=mean(as.numeric(assignment_p))) %>%
    ungroup() %>%
    mutate(pop=fct_reorder(pop,avg_assignment))
  return(Cerysnmf_data_k2_long)
}

samples <- c("AMA340","AMA601","AMA656","AMANA034","AMANA137","AMZ551","AMZ610","B4615","B5082","BR033","BR036","BR36440","CAM117","CPE012","CPE025","CPE049","CPE069","CPEII008","CUJ027","CUJ072","FPR006","FPR015","FPR021","GAPTO259","GAPTO268","GAPTO277","GAPTO327","MAM010","MAR092","MF005","MPDS628","MSF107","MT087","MT123","OM022","OM112","OM172","OP100","PIME024","PIME062","PIME079","PIME134","PIME135","PIME169","PIME460","PIME482","PIME521","PIME522","PPBIO221","PPBIO305","PPS041","PPS174","PPS221","PPS352","PUC008","RDO007","TLP025","TLP123","UFAC1007","UFAC305","UFAC476","UHE442","UNA002","UNA018","UNA088","UNA110","UNA124","WM281","WM293")

#Cerysnmf_data_k2 <- load_Ceratopiprasnmfk2(folder="snmf", Dataset="Cerrub.AC4.37_0.2_5_20.0", samples)

#Save K=2 and 3 with the fullsearch algorithm of CLUMPP (fullsearch could not completefor K=4)
cbbPalette <- c("#E69F00", "#0072B2", "#F0E442", "#009E73", "#E69F00", "#56B4E9", "#CC79A7")
plot_Ceratopipra2(load_Ceratopiprasnmfk2(folder="snmf", Dataset="Cerrub.AC4.37_0.2_5_20.0", samples), title="K=2: AC4.37_0.2_5_20.0 snmf")+
  theme(axis.title.y = element_blank(), axis.text.y = element_blank(), axis.title.x = element_blank(), legend.position = "none")+
  scale_fill_manual(values=cbbPalette)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: `expand_scale()` was deprecated in ggplot2 3.3.0.
## ℹ Please use `expansion()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.

#ggsave("snmf/snmf_k2.pdf")

cbbPalette <- c("#E69F00", "#F0E442", "#0072B2", "#009E73", "#E69F00", "#56B4E9", "#CC79A7")
plot_Ceratopipra2(load_Ceratopiprasnmfk3(folder="snmf", Dataset="Cerrub.AC4.37_0.2_5_20.0", samples), title="K=3: AC4.37_0.2_5_20.0 snmf")+
  theme(axis.title.y = element_blank(), axis.text.y = element_blank(), axis.title.x = element_blank(), legend.position = "none")+
  scale_fill_manual(values=cbbPalette)
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.

#ggsave("snmf/snmf_k3.pdf")

cbbPalette <- c("#F0E442", "#E69F00", "#CC79A7", "#009E73", "#E69F00", "#56B4E9", "#CC79A7")
plot_Ceratopipra2(load_Ceratopiprasnmfk4(folder="snmf", Dataset="Cerrub.AC4.37_0.2_5_20.0", samples), title="K=4: AC4.37_0.2_5_20.0 snmf")+
  theme(axis.title.y = element_blank(), axis.text.y = element_blank(), axis.title.x = element_blank(), legend.position = "none")+
  scale_fill_manual(values=cbbPalette)
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.

#ggsave("snmf/snmf_k4.pdf")

cbbPalette <- c("#F0E442", "#E69F00", "#CC79A7", "#009E73", "#56B4E9", "#56B4E9", "#CC79A7")
plot_Ceratopipra2(load_Ceratopiprasnmfk5(folder="snmf", Dataset="Cerrub.AC4.37_0.2_5_20.0", samples), title="K=5: AC4.37_0.2_5_20.0 snmf")+
  theme(axis.title.y = element_blank(), axis.text.y = element_blank(), axis.title.x = element_blank(), legend.position = "none")+
  scale_fill_manual(values=cbbPalette)
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.